Cream of the Crop 20

home *** CD-ROM | disk | FTP | other *** search

/ Cream of the Crop 20 / Cream of the Crop 20 (Terry Blount) (1996).iso / os2 / xdsn217.zip / SAMPLES / SIMPLE / linnew.c < prev next >

Wrap

C/C++ Source or Header | 1996-06-04 | 23KB | 881 lines

/* ** ** LINPACK.C Linpack benchmark, calculates FLOPS. ** (FLoating Point Operations Per Second) ** ** Translated to C by Bonnie Toy 5/88 ** ** Modified by Will Menninger, 10/93, with these features: ** (modified on 2/25/94 to fix a problem with daxpy for ** unequal increments or equal increments not equal to 1. ** Jack Dongarra) ** ** - Defaults to double precision. ** - Averages ROLLed and UNROLLed performance. ** - User selectable array sizes. ** - Automatically does enough repetitions to take at least 50 CPU seconds. ** - Prints machine precision. ** - ANSI prototyping. ** ** To compile: cc -O -o linpack linpack.c -lm ** ** */ #include <stdio.h> #include <stdlib.h> #include <math.h> #include <time.h> #include <float.h> #define DP #ifdef SP #define ZERO 0.0 #define ONE 1.0 #define PREC "Single" #define BASE10DIG FLT_DIG typedef float REAL; #endif #ifdef DP #define ZERO 0.0e0 #define ONE 1.0e0 #define PREC "Double" #define BASE10DIG DBL_DIG typedef double REAL; #endif static REAL linpack (long nreps,int arsize); static void matgen (REAL *a,int lda,int n,REAL *b,REAL *norma); static void dgefa (REAL *a,int lda,int n,int *ipvt,int *info,int roll); static void dgesl (REAL *a,int lda,int n,int *ipvt,REAL *b,int job,int roll); static void daxpy_r (int n,REAL da,REAL *dx,int incx,REAL *dy,int incy); static REAL ddot_r (int n,REAL *dx,int incx,REAL *dy,int incy); static void dscal_r (int n,REAL da,REAL *dx,int incx); static void daxpy_ur (int n,REAL da,REAL *dx,int incx,REAL *dy,int incy); static REAL ddot_ur (int n,REAL *dx,int incx,REAL *dy,int incy); static void dscal_ur (int n,REAL da,REAL *dx,int incx); static int idamax (int n,REAL *dx,int incx); static REAL second (void); static void *mempool; void main(void) { char buf[80]; int arsize; long arsize2d,memreq,nreps; size_t malloc_arg; while (1) { printf("Enter array size (q to quit) [200]: "); fgets(buf,79,stdin); if (buf[0]=='q' || buf[0]=='Q') break; if (buf[0]=='\0' || buf[0]=='\n') arsize=200; else arsize=atoi(buf); arsize/=2; arsize*=2; if (arsize<10) { printf("Too small.\n"); continue; } arsize2d = (long)arsize*(long)arsize; memreq=arsize2d*sizeof(REAL)+(long)arsize*sizeof(REAL)+(long)arsize*sizeof(int); printf("Memory required: %ldK.\n",(memreq+512L)>>10); malloc_arg=(size_t)memreq; if (malloc_arg!=memreq || (mempool=malloc(malloc_arg))==NULL) { printf("Not enough memory available for given array size.\n\n"); continue; } printf("\n\nLINPACK benchmark, %s precision.\n",PREC); printf("Machine precision: %d digits.\n",BASE10DIG); printf("Array size %d X %d.\n",arsize,arsize); printf("Average rolled and unrolled performance:\n\n"); printf(" Reps Time(s) DGEFA DGESL OVERHEAD KFLOPS\n"); printf("----------------------------------------------------\n"); nreps=1; while (linpack(nreps,arsize)<40.) nreps*=2; free(mempool); printf("\n"); } } static REAL linpack(long nreps,int arsize) { REAL *a,*b; REAL norma,t1,kflops,tdgesl,tdgefa,totalt,toverhead,ops; int *ipvt,n,info,lda; long i,arsize2d; lda = arsize; n = arsize/2; arsize2d = (long)arsize*(long)arsize; ops=((2.0*n*n*n)/3.0+2.0*n*n); a=(REAL *)mempool; b=a+arsize2d; ipvt=(int *)&b[arsize]; tdgesl=0; tdgefa=0; totalt=second(); for (i=0;i<nreps;i++) { matgen(a,lda,n,b,&norma); t1 = second(); dgefa(a,lda,n,ipvt,&info,1); tdgefa += second()-t1; t1 = second(); dgesl(a,lda,n,ipvt,b,0,1); tdgesl += second()-t1; } for (i=0;i<nreps;i++) { matgen(a,lda,n,b,&norma); t1 = second(); dgefa(a,lda,n,ipvt,&info,0); tdgefa += second()-t1; t1 = second(); dgesl(a,lda,n,ipvt,b,0,0); tdgesl += second()-t1; } totalt=second()-totalt; if (totalt<0.5 || tdgefa+tdgesl<0.2) return(0.); kflops=2.*nreps*ops/(1000.*(tdgefa+tdgesl)); toverhead=totalt-tdgefa-tdgesl; if (tdgefa<0.) tdgefa=0.; if (tdgesl<0.) tdgesl=0.; if (toverhead<0.) toverhead=0.; printf("%8ld %6.2f %6.2f%% %6.2f%% %6.2f%% %9.3f\n", nreps,totalt,100.*tdgefa/totalt, 100.*tdgesl/totalt,100.*toverhead/totalt, kflops); return(totalt); } /* ** For matgen, ** We would like to declare a[][lda], but c does not allow it. In this ** function, references to a[i][j] are written a[lda*i+j]. */ static void matgen(REAL *a,int lda,int n,REAL *b,REAL *norma) { int init,i,j; init = 1325; *norma = 0.0; for (j = 0; j < n; j++) for (i = 0; i < n; i++) { init = (int)((long)3125*(long)init % 65536L); a[lda*j+i] = (init - 32768.0)/16384.0; *norma = (a[lda*j+i] > *norma) ? a[lda*j+i] : *norma; } for (i = 0; i < n; i++) b[i] = 0.0; for (j = 0; j < n; j++) for (i = 0; i < n; i++) b[i] = b[i] + a[lda*j+i]; } /* ** ** DGEFA benchmark ** ** We would like to declare a[][lda], but c does not allow it. In this ** function, references to a[i][j] are written a[lda*i+j]. ** ** dgefa factors a double precision matrix by gaussian elimination. ** ** dgefa is usually called by dgeco, but it can be called ** directly with a saving in time if rcond is not needed. ** (time for dgeco) = (1 + 9/n)*(time for dgefa) . ** ** on entry ** ** a REAL precision[n][lda] ** the matrix to be factored. ** ** lda integer ** the leading dimension of the array a . ** ** n integer ** the order of the matrix a . ** ** on return ** ** a an upper triangular matrix and the multipliers ** which were used to obtain it. ** the factorization can be written a = l*u where ** l is a product of permutation and unit lower ** triangular matrices and u is upper triangular. ** ** ipvt integer[n] ** an integer vector of pivot indices. ** ** info integer ** = 0 normal value. ** = k if u[k][k] .eq. 0.0 . this is not an error ** condition for this subroutine, but it does ** indicate that dgesl or dgedi will divide by zero ** if called. use rcond in dgeco for a reliable ** indication of singularity. ** ** linpack. this version dated 08/14/78 . ** cleve moler, university of New Mexico, argonne national lab. ** ** functions ** ** blas daxpy,dscal,idamax ** */ static void dgefa(REAL *a,int lda,int n,int *ipvt,int *info,int roll) { REAL t; int idamax(),j,k,kp1,l,nm1; /* gaussian elimination with partial pivoting */ if (roll) { *info = 0; nm1 = n - 1; if (nm1 >= 0) for (k = 0; k < nm1; k++) { kp1 = k + 1; /* find l = pivot index */ l = idamax(n-k,&a[lda*k+k],1) + k; ipvt[k] = l; /* zero pivot implies this column already triangularized */ if (a[lda*k+l] != ZERO) { /* interchange if necessary */ if (l != k) { t = a[lda*k+l]; a[lda